{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Principal Component Analysis (PCA)"
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "Principal component analysis (PCA) is a method that used to make change and bring out strong patterns in a dataset. On the other hand, it is used for to make data easy to explore and visualize. "
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume\n",
              "Date                                                    \n",
              "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
            ],
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-02</th>\n",
              "      <td>3.85</td>\n",
              "      <td>3.98</td>\n",
              "      <td>3.84</td>\n",
              "      <td>3.95</td>\n",
              "      <td>3.95</td>\n",
              "      <td>20548400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from numpy import array\n",
        "from numpy import mean\n",
        "from numpy import cov\n",
        "from numpy.linalg import eig"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Define A\n",
        "A = array([dataset['Adj Close'], dataset['High'],dataset['Low']])\n",
        "# calculate the mean of each column\n",
        "M = mean(A.T, axis=1)\n",
        "print(M)\n",
        "# center columns by subtracting column means\n",
        "C = A - M\n",
        "print(C)\n",
        "# calculate covariance matrix of centered matrix\n",
        "V = cov(C.T)\n",
        "print(V)\n",
        "# eigendecomposition of covariance matrix\n",
        "values, vectors = eig(V)\n",
        "print(vectors)\n",
        "print(values)\n",
        "# project data\n",
        "P = vectors.T.dot(C.T)\n",
        "print(P.T)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[ 3.96        4.1         4.18       ... 21.91666667 23.55\n",
            " 25.72999933]\n",
            "[[ 0.04        0.03        0.         ...  0.37333433  0.43\n",
            "  -0.46999933]\n",
            " [ 0.04        0.08        0.07       ...  0.40333333  0.45\n",
            "   1.56999967]\n",
            " [-0.08       -0.11       -0.07       ... -0.77666767 -0.88\n",
            "  -1.10000033]]\n",
            "[[0.0048     0.0066     0.0042     ... 0.04660006 0.0528     0.06600002]\n",
            " [0.0066     0.0097     0.00665    ... 0.06445007 0.07285    0.11625001]\n",
            " [0.0042     0.00665    0.0049     ... 0.04130003 0.04655    0.09345   ]\n",
            " ...\n",
            " [0.04660006 0.06445007 0.04130003 ... 0.45263448 0.51275065 0.6560505 ]\n",
            " [0.0528     0.07285    0.04655    ... 0.51275065 0.5809     0.73620022]\n",
            " [0.06600002 0.11625001 0.09345    ... 0.6560505  0.73620022 1.94789953]]\n",
            "[[ 8.82471564e-03+0.00000000e+00j -1.36601491e-02+0.00000000e+00j\n",
            "   1.01029061e-02+5.65166152e-03j ... -8.18394615e-16-2.67467478e-16j\n",
            "  -8.18394615e-16+2.67467478e-16j  0.00000000e+00+0.00000000e+00j]\n",
            " [ 1.39491779e-02+0.00000000e+00j -1.01355763e-02+0.00000000e+00j\n",
            "  -6.16357368e-03+1.12765333e-02j ...  6.30206971e-05+1.43987860e-05j\n",
            "   6.30206971e-05-1.43987860e-05j  0.00000000e+00+0.00000000e+00j]\n",
            " [ 1.02628976e-02+0.00000000e+00j  1.53349784e-04+0.00000000e+00j\n",
            "  -8.06423988e-03+2.38498377e-04j ... -1.95672590e-05+3.94820167e-05j\n",
            "  -1.95672590e-05-3.94820167e-05j  0.00000000e+00+0.00000000e+00j]\n",
            " ...\n",
            " [ 8.67624714e-02+0.00000000e+00j -1.27429348e-01+0.00000000e+00j\n",
            "   7.30784342e-02-2.49706707e-02j ...  3.10097700e-04+2.05808105e-04j\n",
            "   3.10097700e-04-2.05808105e-04j  0.00000000e+00+0.00000000e+00j]\n",
            " [ 9.77979496e-02+0.00000000e+00j -1.46802789e-01+0.00000000e+00j\n",
            "   1.50459267e-01+0.00000000e+00j ...  1.15073643e-04+2.16331965e-04j\n",
            "   1.15073643e-04-2.16331965e-04j  0.00000000e+00+0.00000000e+00j]\n",
            " [ 1.95399750e-01+0.00000000e+00j  1.64975573e-01+0.00000000e+00j\n",
            "   1.30289283e-01-1.12273352e-02j ...  4.36910331e-04+2.05963352e-04j\n",
            "   4.36910331e-04-2.05963352e-04j  0.00000000e+00+0.00000000e+00j]]\n",
            "[ 4.65203464e+01+0.00000000e+00j  6.30869174e+00+0.00000000e+00j\n",
            " -4.46509512e-16+1.19573488e-15j ...  2.46296853e-34+1.55993171e-34j\n",
            "  2.46296853e-34-1.55993171e-34j  0.00000000e+00+0.00000000e+00j]\n",
            "[[ 4.33356806e-02+0.00000000e+00j -2.90023005e+00+0.00000000e+00j\n",
            "  -6.07586897e-16-1.10328413e-15j ... -1.04083409e-16+2.56739074e-16j\n",
            "  -1.04083409e-16-2.56739074e-16j  0.00000000e+00+0.00000000e+00j]\n",
            " [ 6.79881146e+00+0.00000000e+00j  1.46393554e+00+0.00000000e+00j\n",
            "  -8.65627015e-16-2.77555756e-16j ...  6.59194921e-17+3.50414142e-16j\n",
            "   6.59194921e-17-3.50414142e-16j  0.00000000e+00+0.00000000e+00j]\n",
            " [-6.84214714e+00+0.00000000e+00j  1.43629450e+00+0.00000000e+00j\n",
            "  -3.45209972e-16-1.30451205e-15j ...  2.42861287e-17+2.82759927e-16j\n",
            "   2.42861287e-17-2.82759927e-16j  0.00000000e+00+0.00000000e+00j]]\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.decomposition import PCA\n",
        "\n",
        "# create the PCA instance\n",
        "pca = PCA(2)\n",
        "# fit on data\n",
        "pca.fit(A)\n",
        "# access values and vectors\n",
        "print(pca.components_)\n",
        "print(pca.explained_variance_)\n",
        "# transform data\n",
        "B = pca.transform(A)\n",
        "print(B)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[[-8.82471564e-03 -1.39491779e-02 -1.02628976e-02 ... -8.67624714e-02\n",
            "  -9.77979496e-02 -1.95399750e-01]\n",
            " [ 1.36601491e-02  1.01355763e-02 -1.53349784e-04 ...  1.27429348e-01\n",
            "   1.46802789e-01 -1.64975573e-01]]\n",
            "[46.52034639  6.30869174]\n",
            "[[-0.04333568  2.90023005]\n",
            " [-6.79881146 -1.46393554]\n",
            " [ 6.84214714 -1.4362945 ]]\n"
          ]
        }
      ],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "file_extension": ".py",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "version": "3.5.5",
      "mimetype": "text/x-python",
      "name": "python"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.15.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}